# Setup environment ----
library(data.table)
library(bigKRLS)
source("R/functions.R")
results_path <- "results/replication/"
load("data/data-2021-02-02.RData")
load(paste0(results_path, "senate_krls_cv.RData"))
load(paste0(results_path, "senate_svm.RData"))
load(paste0(results_path, "senate_rf.RData"))
load(paste0(results_path, "senate_lasso.RData"))
load(paste0(results_path, "house_krls_cv.RData"))
load(paste0(results_path, "house_svm.RData"))
load(paste0(results_path, "house_rf.RData"))
load(paste0(results_path, "house_lasso.RData"))



# ranges of random forest marginal effect estimates ----
# these should be bounded by [-1,1], but they are not
house_rf_data[, range(d_dem_spend_adv)]
senate_rf_data[, range(d_dem_spend_adv)]
# > 2% of ME estimates are out of bounds in the House
house_rf_data[, mean(abs(d_dem_spend_adv) > 1)]
senate_rf_data[, mean(abs(d_dem_spend_adv) > 1)]

# this leads to astronomical estimates for the effectiveness of spending
ggplot(house_rf_data[dem_spend_adv != 0],
  aes(dem_spend_adv, d_dem_spend_adv)) + geom_smooth() +
  theme(text = element_text(family = "sans"))
ggplot(senate_rf_data[dem_spend_adv != 0],
  aes(dem_spend_adv, d_dem_spend_adv)) + geom_smooth() +
  theme(text = element_text(family = "sans"))

# when we drop those outliers, we get estimates that match other methods
ggplot(house_rf_data[abs(d_dem_spend_adv) < 1 & dem_spend_adv != 0],
  aes(dem_spend_adv, d_dem_spend_adv)) + geom_smooth() +
  theme(text = element_text(family = "sans"))
ggplot(senate_rf_data[abs(d_dem_spend_adv) < 1 & dem_spend_adv != 0],
  aes(dem_spend_adv, d_dem_spend_adv)) + geom_smooth() +
  theme(text = element_text(family = "sans"))


loess_house <- house_svm_data[incl == TRUE,
  loess(d_dem_spend_adv ~ dem_spend_adv)]
integrate(function(x) predict(loess_house, x),
  lower = house_svm_data[incl == TRUE, min(dem_spend_adv)],
  upper = house_svm_data[incl == TRUE, max(dem_spend_adv)])

loess_senate <- senate_svm_data[incl == TRUE,
  loess(d_dem_spend_adv ~ dem_spend_adv)]
integrate(function(x) predict(loess_senate, x),
  lower = senate_svm_data[incl == TRUE, min(dem_spend_adv)],
  upper = senate_svm_data[incl == TRUE, max(dem_spend_adv)])




#     LASSO with polynomial in spending advantage ----
load(paste0(results_path, "sen_lasso_poly.RData"))
load(paste0(results_path, "house_lasso_poly.RData"))

round(mean(cv_rmse(cv_lasso_poly_sen, sen_folds, sen_y)), 3)
round(mean(cv_R2(cv_lasso_poly_sen, sen_folds, sen_y)), 2)

round(mean(cv_rmse(cv_lasso_poly_hou, hou_folds, hou_y)), 3)
round(mean(cv_R2(cv_lasso_poly_hou, hou_folds, hou_y)), 2)





# Senate and House KRLS w/ lagged outcome + lagged spending ----
load.bigKRLS(paste0(results_path, "senate_krls_model_lag2"),
  newname = "senate_krls_model_lag2")
senate_krls_sum_lag2 <- summary(senate_krls_model_lag2)
senate_krls_sum_lag2$ttests <- as.data.table(senate_krls_sum_lag2$ttests)
senate_krls_sum_lag2$percentiles <-
  as.data.table(senate_krls_sum_lag2$percentiles)

load.bigKRLS(paste0(results_path, "house_krls_sum_lag2"),
  newname = "house_krls_sum_lag2")
house_krls_sum_lag2 <- summary(house_krls_sum_lag2)
house_krls_sum_lag2$ttests <- as.data.table(house_krls_sum_lag2$ttests)
house_krls_sum_lag2$percentiles <-
  as.data.table(house_krls_sum_lag2$percentiles)

if (!file.exists(paste0(results_path, "senate_krls_model_lag2"))) {
  sen_X_lag2 <- cbind(sen_X_lag, data.matrix(senate_data_lag[, lag_DSA]))
  colnames(sen_X_lag2)[ncol(sen_X_lag2)] <- "lag_DSA"
  ok <- complete.cases(sen_X_lag2)
  sen_X_lag2 <- sen_X_lag2[ok, ]
  sen_y_lag2 <- sen_y_lag[ok, , drop = FALSE]
  senate_krls_model_lag2 <- bigKRLS::bigKRLS(X = sen_X_lag2,
    y = sen_y_lag2, noisy = TRUE)
  summary(senate_krls_model_lag2)

  hou_X_lag2 <- cbind(hou_X_lag, data.matrix(house_data_lag[, lag_DSA]))
  colnames(hou_X_lag2)[ncol(hou_X_lag2)] <- "lag_DSA"
  ok <- complete.cases(hou_X_lag2)
  hou_X_lag2 <- hou_X_lag2[ok, ]
  hou_y_lag2 <- hou_y_lag[ok, , drop = FALSE]
  house_krls_model_lag2 <- bigKRLS::bigKRLS(X = hou_X_lag2,
    y = hou_y_lag2, noisy = TRUE)
  summary(house_krls_model_lag2)

  save.bigKRLS(house_krls_model_lag2,
    model_subfolder_name = paste0(results_path, "house_krls_model_lag2"))
  save.bigKRLS(senate_krls_model_lag2,
    model_subfolder_name = paste0(results_path, "senate_krls_model_lag2"))
} else {
  load.bigKRLS(paste0(results_path, "house_krls_model_lag2"),
    newname = "house_krls_model_lag2")
  load.bigKRLS(paste0(results_path, "senate_krls_model_lag2"),
    newname = "senate_krls_model_lag2")
}
